import pandas as pd
import os
dat_clean = pd.read_csv("bask_clean.csv", dtype={'Abb':'category'})
dat_clean.head()
#The posterior is θ|Y ∼ Beta(a + Y, b + n − Y), y is number of successes, n is number of trials
factor = 0.4
a = dat_clean["Proj"]
b = (82 - dat_clean["Proj"])
yy = dat_clean['Wins']
n = dat_clean['Games']
from scipy import stats
from scipy.stats import beta
import numpy as np
beta(a[1]*factor, b[1]*factor)
import matplotlib.pyplot as plt
x = np.linspace(0,1,200)
y = stats.beta(a[1]*factor, b[1]*factor).pdf(x)
plt.plot(x, y)
factor = 0.4
maps = []
for i in range(len(dat_clean['Proj'])):
x = np.linspace(0,1,200)
y = stats.beta(a[i]*factor + yy[i], b[i]*factor + n[i] - yy[i]).pdf(x)
maps.append(np.argmax(y))
plt.plot(x *82, y)
plt.title(str(dat_clean["Team"][i]) + " " +str(dat_clean["Year"][i]))
plt.axvline(x=dat_clean["Final"][i])
plt.show()
dat_diff = np.abs(dat_clean['Final'] - dat_clean['Proj'])
dat_diff.head()
dat_diff = dat_diff[np.logical_not(np.isnan(dat_diff))]
maps = np.asarray(maps)
exes = (maps / 200) * 82
test = np.delete(exes, np.arange(3, exes.size, 4))
len(test)
dat_final = dat_clean["Final"][np.logical_not(np.isnan(dat_clean["Final"]))]
diffs_final = np.abs(dat_final - test)
print(np.mean(diffs_final))
print(np.median(diffs_final))
print(test[0])
plt.hist(diffs_final, density = True)
plt.xlabel("MAP vs. Results Absolute Difference")
plt.title("Moderate Prior Approach")
maps = []
for i in range(len(dat_clean['Proj'])):
x = np.linspace(0,1,200)
y = stats.beta(1 + yy[i], 1 + n[i] - yy[i]).pdf(x)
maps.append(np.argmax(y))
plt.plot(x *82, y)
plt.title(str(dat_clean["Team"][i]) + " " +str(dat_clean["Year"][i]))
plt.axvline(x=dat_clean["Final"][i])
plt.show()
maps = np.asarray(maps)
exes = (maps / 200) * 82
test = np.delete(exes, np.arange(3, exes.size, 4))
dat_final = dat_clean["Final"][np.logical_not(np.isnan(dat_clean["Final"]))]
diffs_final = np.abs(dat_final - test)
print(np.mean(diffs_final))
print(np.median(diffs_final))
print(test[0])
plt.hist(diffs_final, density = True)
factor = 1
maps = []
for i in range(len(dat_clean['Proj'])):
x = np.linspace(0,1,200)
y = stats.beta(a[i]*factor + yy[i], b[i]*factor + n[i] - yy[i]).pdf(x)
maps.append(np.argmax(y))
plt.plot(x *82, y)
plt.title(str(dat_clean["Team"][i]) + " " +str(dat_clean["Year"][i]))
plt.axvline(x=dat_clean["Final"][i])
plt.show()
maps = np.asarray(maps)
exes = (maps / 200) * 82
test = np.delete(exes, np.arange(3, exes.size, 4))
dat_final = dat_clean["Final"][np.logical_not(np.isnan(dat_clean["Final"]))]
diffs_final = np.abs(dat_final - test)
print(np.mean(diffs_final))
print(np.median(diffs_final))
print(test[0])
plt.hist(diffs_final, density = True)
factor = 0.4
maps = []
l_bound = []
u_bound = []
for i in range(len(dat_clean['Proj'])):
x = np.linspace(0,1,200)
y = stats.beta(a[i]*factor + yy[i], b[i]*factor + n[i] - yy[i]).pdf(x)
maps.append(np.argmax(y))
l_bound.append(beta.ppf(0.05, a[i]*factor + yy[i], b[i]*factor + n[i] - yy[i]) * 82)
u_bound.append(beta.ppf(0.95, a[i]*factor + yy[i], b[i]*factor + n[i] - yy[i]) * 82)
plt.plot(x *82, y)
plt.title(str(dat_clean["Team"][i]) + " " +str(dat_clean["Year"][i]))
plt.axvline(x=dat_clean["Final"][i])
plt.show()
dat_final = dat_clean["Final"][np.logical_not(np.isnan(dat_clean["Final"]))]
dat_final = np.asarray(dat_final)
l_bound = np.asarray(l_bound)
u_bound = np.asarray(u_bound)
l_bound1 = np.delete(l_bound, np.arange(3, l_bound.size, 4))
u_bound1 = np.delete(u_bound, np.arange(3, u_bound.size, 4))
l_bound1
u_bound1
count = 0
for j in range(len(test)):
if(dat_final[j] > l_bound1[j]) & (test[j] < u_bound1[j]):
count = count + 1
print(count / len(test))
print(l_bound1[0])
print(u_bound1[0])
u_bound1[0]
#80 percent of predictions contained in 90% credible interval for factor of 1
#93.3% for factor of 0.4
maps = []
l_bound = []
u_bound = []
for i in range(len(dat_clean['Proj'])):
x = np.linspace(0,1,200)
y = stats.beta(1 + yy[i], 1 + n[i] - yy[i]).pdf(x)
maps.append(np.argmax(y))
l_bound.append(beta.ppf(0.05, 1 + yy[i], 1 + n[i] - yy[i]) * 82)
u_bound.append(beta.ppf(0.95, 1 + yy[i], 1 + n[i] - yy[i]) * 82)
plt.plot(x *82, y)
plt.title(str(dat_clean["Team"][i]) + " " +str(dat_clean["Year"][i]))
plt.axvline(x=dat_clean["Final"][i])
plt.show()
dat_final = dat_clean["Final"][np.logical_not(np.isnan(dat_clean["Final"]))]
dat_final = np.asarray(dat_final)
l_bound = np.asarray(l_bound)
u_bound = np.asarray(u_bound)
l_bound1 = np.delete(l_bound, np.arange(3, l_bound.size, 4))
u_bound1 = np.delete(u_bound, np.arange(3, u_bound.size, 4))
l_bound1
u_bound1
count = 0
for j in range(len(test)):
if(dat_final[j] > l_bound1[j]) & (test[j] < u_bound1[j]):
count = count + 1
count / len(test)
#95.55% for flat prior.